{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## The next cell will get a ~65 MB data file 'sequence.index', you only need to run the cell once"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2018-09-18 14:09:47--  ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index\n",
      "           => ‘sequence.index’\n",
      "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n",
      "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n",
      "Logging in as anonymous ... Logged in!\n",
      "==> SYST ... done.    ==> PWD ... done.\n",
      "==> TYPE I ... done.  ==> CWD (1) /vol1/ftp/historical_data/former_toplevel ... done.\n",
      "==> SIZE sequence.index ... 67069489\n",
      "==> PASV ... done.    ==> RETR sequence.index ... done.\n",
      "Length: 67069489 (64M) (unauthoritative)\n",
      "\n",
      "sequence.index      100%[===================>]  63.96M  1.35MB/s    in 49s     \n",
      "\n",
      "2018-09-18 14:10:38 (1.31 MB/s) - ‘sequence.index’ saved [67069489]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!rm sequence.index 2>/dev/null\n",
    "!wget -nd ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index -O sequence.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Interfacing with R"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/lib/ggplot2.py:67: UserWarning: This was designed againt ggplot2 version 2.2.1 but you have 3.0.0\n",
      "  warnings.warn('This was designed againt ggplot2 version %s but you have %s' % (TARGET_VERSION, ggplot2.__version__))\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "from IPython.display import Image\n",
    "\n",
    "import rpy2.robjects as robjects\n",
    "import rpy2.robjects.lib.ggplot2 as ggplot2\n",
    "from rpy2.robjects.functions import SignatureTranslatedFunction\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from rpy2.robjects import pandas2ri\n",
    "from rpy2.robjects import default_converter\n",
    "from rpy2.robjects.conversion import localconverter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'rpy2.robjects.vectors.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "read_delim = robjects.r('read.delim')\n",
    "seq_data = read_delim('sequence.index', header=True, stringsAsFactors=False)\n",
    "#In R:\n",
    "#  seq.data <- read.delim('sequence.index', header=TRUE, stringsAsFactors=FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This data frame has 26 columns and 187720 rows\n",
      " [1] \"FASTQ_FILE\"          \"MD5\"                 \"RUN_ID\"             \n",
      " [4] \"STUDY_ID\"            \"STUDY_NAME\"          \"CENTER_NAME\"        \n",
      " [7] \"SUBMISSION_ID\"       \"SUBMISSION_DATE\"     \"SAMPLE_ID\"          \n",
      "[10] \"SAMPLE_NAME\"         \"POPULATION\"          \"EXPERIMENT_ID\"      \n",
      "[13] \"INSTRUMENT_PLATFORM\" \"INSTRUMENT_MODEL\"    \"LIBRARY_NAME\"       \n",
      "[16] \"RUN_NAME\"            \"RUN_BLOCK_NAME\"      \"INSERT_SIZE\"        \n",
      "[19] \"LIBRARY_LAYOUT\"      \"PAIRED_FASTQ\"        \"WITHDRAWN\"          \n",
      "[22] \"WITHDRAWN_DATE\"      \"COMMENT\"             \"READ_COUNT\"         \n",
      "[25] \"BASE_COUNT\"          \"ANALYSIS_GROUP\"     \n",
      "\n",
      "Columns in Python 26 \n",
      "Type of read count before as.integer: character\n",
      "Type of read count after as.integer: integer\n",
      " [1]\n",
      " \"Column names in R: \"\n",
      " \"FASTQ_FILE\"         \n",
      " \"MD5\"                \n",
      "\n",
      "\n",
      " [4]\n",
      " \"RUN_ID\"             \n",
      " \"STUDY_ID\"           \n",
      " \"STUDY_NAME\"         \n",
      "\n",
      "\n",
      " [7]\n",
      " \"CENTER_NAME\"        \n",
      " \"SUBMISSION_ID\"      \n",
      " \"SUBMISSION_DATE\"    \n",
      "\n",
      "\n",
      "[10]\n",
      " \"SAMPLE_ID\"          \n",
      " \"SAMPLE_NAME\"        \n",
      " \"POPULATION\"         \n",
      "\n",
      "\n",
      "[13]\n",
      " \"EXPERIMENT_ID\"      \n",
      " \"INSTRUMENT_PLATFORM\"\n",
      " \"INSTRUMENT_MODEL\"   \n",
      "\n",
      "\n",
      "[16]\n",
      " \"LIBRARY_NAME\"       \n",
      " \"RUN_NAME\"           \n",
      " \"RUN_BLOCK_NAME\"     \n",
      "\n",
      "\n",
      "[19]\n",
      " \"INSERT_SIZE\"        \n",
      " \"LIBRARY_LAYOUT\"     \n",
      " \"PAIRED_FASTQ\"       \n",
      "\n",
      "\n",
      "[22]\n",
      " \"WITHDRAWN\"          \n",
      " \"WITHDRAWN_DATE\"     \n",
      " \"COMMENT\"            \n",
      "\n",
      "\n",
      "[25]\n",
      " \"READ_COUNT\"         \n",
      " \"BASE_COUNT\"         \n",
      " \"ANALYSIS_GROUP\"     \n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <span>FactorVector with 177877 elements.</span>\n",
       "    <table>\n",
       "      <tbody>\n",
       "      <tr>\n",
       "      \n",
       "      <td>\n",
       "        YRI\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        YRI\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        YRI\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        YRI\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        ...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        STU\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        STU\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        STU\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        STU\n",
       "      </td>\n",
       "      \n",
       "      </tr>\n",
       "      </tbody>\n",
       "    </table>\n",
       "    "
      ],
      "text/plain": [
       "R object with classes: ('factor',) mapped to:\n",
       "<FactorVector - Python:0x7fb6bce37208 / R:0x55ceac863620>\n",
       "[YRI, YRI, YRI, YRI, ..., STU, STU, STU, STU]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print('This data frame has %d columns and %d rows' % (seq_data.ncol, seq_data.nrow))\n",
    "print(seq_data.colnames)\n",
    "#In R:\n",
    "#  print(colnames(seq.data))\n",
    "#  print(nrow(seq.data))\n",
    "#  print(ncol(seq.data))\n",
    "\n",
    "print('Columns in Python %d ' % robjects.r.ncol(seq_data)[0])\n",
    "\n",
    "#access some functions\n",
    "as_integer = robjects.r('as.integer')\n",
    "match = robjects.r.match\n",
    "\n",
    "my_col = match('READ_COUNT', seq_data.colnames)[0] # Vector returned\n",
    "print('Type of read count before as.integer: %s' % seq_data[my_col - 1].rclass[0])\n",
    "seq_data[my_col - 1] = as_integer(seq_data[my_col - 1])\n",
    "print('Type of read count after as.integer: %s' % seq_data[my_col - 1].rclass[0])\n",
    "\n",
    "my_col = match('BASE_COUNT', seq_data.colnames)[0] # Vector returned\n",
    "seq_data[my_col - 1] = as_integer(seq_data[my_col - 1])\n",
    "\n",
    "my_col = match('CENTER_NAME', seq_data.colnames)[0]\n",
    "seq_data[my_col - 1] = robjects.r.toupper(seq_data[my_col - 1])\n",
    "robjects.r.assign('seq.data', seq_data)\n",
    "robjects.r('print(c(\"Column names in R: \",colnames(seq.data)))')\n",
    "\n",
    "robjects.r('seq.data <- seq.data[seq.data$WITHDRAWN==0, ]')\n",
    "#Lets remove all withdrawn sequences\n",
    "\n",
    "robjects.r(\"seq.data <- seq.data[, c('STUDY_ID', 'STUDY_NAME', 'CENTER_NAME', 'SAMPLE_ID', 'SAMPLE_NAME', 'POPULATION', 'INSTRUMENT_PLATFORM', 'LIBRARY_LAYOUT', 'PAIRED_FASTQ', 'READ_COUNT', 'BASE_COUNT', 'ANALYSIS_GROUP')]\")\n",
    "#Lets shorten the dataframe\n",
    "\n",
    "#Population as factor\n",
    "robjects.r('seq.data$POPULATION <- as.factor(seq.data$POPULATION)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAIAAADytinCAAAABmJLR0QA/wD/AP+gvaeTAAAXwklEQVR4nO3dzW9cV/kH8HNf5tV2HWJDitiirln9pLRi1zVSRRcsEELUFVlFbHA3qP9Dw6ZIobIQi7AoEguQ+AOqSMhLhMQGqQJEK9K4uEnGGc94fosBy7Wdl84cz33c+XwW0dxJcu5z375z7rl35haTySQBEE/ZdAEAnE9AAwQloAGCEtAAQdVNF/BsDx8+zNhaURR1XR8eHmZsczZVVaWUxuNx04WkVqs1Ho+Pjo6aLiS12+3hcNh0Faksy7IsR6NR04Wkuq4nk0mQnWQ0GjV+T0Go47coiuw7ycrKysnJSxDQg8EgY2t1XXc6nf39/Yxtzqbf75dlmXfpZtNutw8PDxtPxqIoVlZW/vOf/zRbRkqp0+l0u90Im2Z1dXU8HkeopNfrPXr0qPEPraqqut1uhOO31+tVVZV905wKaEMcAEHN1YP+6KOPUkovvvhiSung4ODu3bsppevXr3e73Xkm514ogC+D2XvQn3322TvvvPOXv/xlOrmzszMej8fj8c7OzpyTAKR5etD9fv///u//jidff/31zc3NlNKdO3dSSru7uzdu3JhtEoA0T0BXVbW+vn48ubm5eXBw8ODBgytXruQoLL377rv3799fW1t78803szQ4Nb1Av7q6mrHN2dR1XRRFhEqmF17a7XbThaSUUpAVUlVVhEparVZd19MbfppVFEWv14twF0eQo+Yijt+zF+qz3cVxcHDw+9///u9///vbb7+dpcFr1671er1+v5/3HqPJZFLXdYT7lsqyLIoiQiWTyeTo6KjxSoqiSDHuOyyKoizLCJXUdR1h00wdHR01fi9mWZYpxk4y7eplT6dT72QL6J2dnW9/+9vf/e53czX42muvTV/cu3cvV5sppbqu2+12hPuWpikQoZLp3ccRbrPr9/sRVkin0wmyaaqqinOb3ePHjyPcZtfpdCKskJTSRdxmd0q2gP7jH/+4u7s7fX379u3t7e2tra2U0vb29vTP558EIKVUND6o9EzZe9Bra2t7e3sZ25zN9IsqDx48aLqQtL6+PhgMIvSgNzY28m7u2Uy/qBLhKzNxvqhy9erV/f39CD3o9fX1+/fvN1tG+t8XVbIfv9NbLY75ogpAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoC7BE1VgNjdv3mxq1rdu3Wpq1nyZ6EEDBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwRVN13As62srGRsrSzLsizztjmbuq6LoohQSVVV3W631Wo1XUhKuTd3U3ItRV3XVVWVZfMdqaIoer3e0dFRs2WUZRnkqKnrOnuSHB4enp5LxtYvyMOHDzO2Vtd1Xdd525xNv98vyzJCJXVdHxwcDIfDZsuYRkCEFTK/XEuxuro6Ho8Hg0GW1ubR6XQGg8FoNGq2jKqqWq1WhJ2k1+tVVXXRlTT/yQzAuQQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUHWuhg4ODu7evZtSun79erfbnWcyV0kAl1q2HvTOzs54PB6Pxzs7O3NOApAy9qB3d3dv3LiRUrpz586ck1Mff/zxaDSqqqrT6eQqMqVUVVVRFFVVZWxzNkVRxKmkLMvGKymKIqXUeBlZ5FqKIJtmKkIl0wIaLyOlVJZl9hVydHR06p1sAX3lypV79+6dO4/Z/PSnP/3HP/6xubn5m9/8JkuDx4qiuHLlSt42Z1MURbvdbrqKVBTFyspK01X8V5BNM6dcS1EUxWQyiTDuVxTF2tpa01WkFOz4bbVaGRscDAan3skW0G+//fZ7772XUnrw4EGWBn/1q19NX0xzP5e6rtfW1vb29jK2OZt+v1+WZa7VNY/19fXBYDAcDpstoyiKjY2NTz75pNkyssi1FKurq+Px+Oxxu3hXr17d398fjUbNllFV1fr6+v3795stI6XU6/Wqqsp+/Pb7/ZOT2QL6t7/97U9+8pOU0p///OdcbQIss2wB/fLLL29tbaWUtre3p3/OPAlASqmYTCZN1/AMhjguWqghjoyb++bNm7ma+qJu3bqVpR1DHKd86Yc4Njc3T076ogpAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQddMFPFtRFNlby9vmPCJUUvxP42WkGCtkfrmWIsimOdZ4JaF2kgVsmksQ0N1uN2NrZVkWRZG3zdnUdR2kkqIoWq1WWTZ8OjXd1yOskPnlWoqqquLsJJ1O5+joqNky4hy/00MmbyWj0ejUO5cgoAeDQcbW6rput9t525xNURRlWUaopN1uD4fD4XDYbBlFUfT7/QgrZH65lqKqqvF4HGGd9Hq9x48fn02QBauqqtPpRFghKaWqqi66EmPQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwR1fkAXRfGUSQAWoD41fZzFQhmgWacDejKZpJSKopi+eH4HBwd3795NKV2/fr3b7c4zmWO5AC6984c4vmg6p5R2dnbG4/F4PN7Z2ZlzEoB0tgd97NQQxzMje3d398aNGymlb33rWycn79y580UnAUhPCugZhjg++eSTN954I6X01ltvbW5uzl/Zu+++e//+/bW1tTfffHP+1o6VZVmW5erqasY2Z1PXdVEUESqpqqrb7bbb7aYLSSmlCCtkfrmWotVq1XVdVVWW1uZRFEWv15vh3Dp7GUGOmos4fofD4em55Gp6Y2Pj9u3bKaWtra3pizldu3at1+v1+/3xeDx/a8cmk0ld13nbnE1ZlkVRRKhkMpkcHR01Xsn0pK3xMrLItRR1XUfYNFNHR0dHR0fN1lCWZYqxk0y7etnT6dQ75wf0ZDKZoROd12uvvTZ9ce/evYzN1nXdbrcHg0HGNmdTFEVZlhEqabfbw+Hw7Kf3ghVF0e/3I6yQ+eVaiqqqxuNxhHXS6/UeP348Go2aLaOqqk6nE2GFpJSqqrroSp44xJE+Pwz9zLDe3t7e2tqavphzEoCUUsPd5OeRvQe9tra2t7eXsc3Z9Pv9siwfPHjQdCFpfX19MBhE6EFvbGxk3Nw3b97M1dQXdevWrSztrK6uBulBX716dX9/P0IPen19/f79+82WkVLq9XpVVWU/fk9dwHtaD/qk+DkO8CXzxDHok5O+VQiweH4sCSCo5xriML4BsHjPNcQBwOI911e95TXA4j3XV70b/9IKwBJykRAgKAENENTTfovj5OSi6gHgv554kVAoAzTriUMcHk4I0KwnPtX7uAd9argDgMVwkRAgKBcJAYJykRAgKEMcAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICg6qYLeLaVlZWMrZVlWZZl3jZnU9d1URQRKqmqqtvttlqtpgtJKffmbkqupajruqqqsmy+I1UURa/XOzo6araMsiyDHDV1XWdPksPDw9Nzydj6BXn48GHG1uq6rus6b5uz6ff7ZVlGqKSu64ODg+Fw2GwZ0wiIsELml2spVldXx+PxYDDI0to8Op3OYDAYjUbNllFVVavVirCT9Hq9qqouupLmP5kBOJeABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRBU3XQBwNK5efNmU7O+detWU7OegR40QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKCyfVHl4ODg7t27KaXr1693u915JnOVBHCpZetB7+zsjMfj8Xi8s7Mz5yQAKWMP+vXXX9/c3Ewp3blzJ6W0u7t748aN2SanHj58OB6Py7IsiiJXkSmlaWt525xHhEqK/2m8jBRjhcwv11IE2TTH4lQys4yLsIBNky2gNzc3P/jgg/fee++tt97K0uAPfvCDDz/88Gtf+9of/vCHLA2etLGxkb3N2QQZ0llbW2u6hP+Ks2nmkXcp+v1+xtZmduXKlaZLyCDvpul0Ohlbe/To0al3cv5Y0iuvvPLKK69sbW3dvn17/tbef//96Yt79+7N39qxuq7X1tb29vYytjmbfr9fluWDBw+aLiStr68PBoPhcNhsGUVRbGxs5N3cTcm1FKurq+PxeDAYZGltHlevXt3f3x+NRk0XMq9cm6bX61VVlf34PfVhnHMMOldTAKSMPeiXX355a2srpbS9vT39c+ZJAFLGgH7ppZdOjmzMMwlA8kUVgLAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoOqmC3i2oiiyt5a3zXlEqKT4n8bLSDFWyPxyLUWQTXMsTiUzy7gIC9g0lyCgu91uxtbKsiyKIm+bs6nrOkglRVG0Wq2ybPh0arqvR1gh88u1FFVVxdlJOp3O0dFR04XMK9fKnB4yeTfNaDQ69c4lCOjBYJCxtbqu2+123jZnUxRFWZYRKmm328PhcDgcNltGURT9fj/CCplfrqWoqmo8HkdYJ71e7/Hjx2cT5NLJuDKrqrroTWMMGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAuwX3QPI+bN282Netbt241NWv4ctODBghKQAMEJaABghLQAEG5SEhmLldCLnrQAEEJaICgBDRAUAIaICgXCYHlFfyath40QFACGiAoAQ0QlIAGCEpAAwQloAGCEtAAQQlogKAENEBQAhogKAENEJSABghKQAMEJaABghLQAEEJaICgBDRAUAIaICgBDRCUZxLOpcEHmqXne6YZcHnpQQMEJaABgjLEAcuiwRE5w3GzCRHQBwcHd+/eTSldv3692+02XQ5ACCECemdn55vf/Ob0xY0bN5ouB3LSb2VmIcagd3d3X3311VdffXV3d7fpWgCiCNGDPtevf/3rvb291dXV733ve2f/9o033lh8SVO//OUvm5r1KSsrK02XkFKYMpJKzghSRlLJec5Wcnh4eOqdYjKZLKqeJ9ra2rp9+/bJF+lZAT2zsizb7fbBwUHGNmdT13VRFGc3yeJ1Op3RaDQej5suJPV6vcFg0HQVqaqquq4fP37cdCGp1WpNJpPRaNR0Ianb7Q6Hw6Ojo2bLCHX8lmU5HA4ztnl4ePjCCy98bi4ZW8/r+9///vTFvXv3MjZb13Vd1w8fPszY5mz6/X5ZlhEqqev64OAg7642g6Ioer1ehBXS6XSKoohQyerq6ng8jvCh1el0BoNB4x8VVVW1Wq0Im6bX61VVddGVhAjo7e3tra2t6YumawGIIkRAv/TSS8cjGwBMhbiLA4CzBDRAUAIaICgBDRCUgAYISkADBCWgAYIS0ABBCWiAoAQ0QFACGiAoAQ0QlIAGCEpAA0Q1WTJ//etff/zjHzddxWQymdy5c+cXv/hF01VMJpPJW2+99ac//anpKiaPHj36zne+MxqNmi5k8sEHH/zsZz9ruorJZDL5+c9//v777zddxWQymfzoRz/629/+1nQVkw8//PCHP/xh01VMJpPJ7373u3feeeei57J0PejDw8OPP/646SpSSml/f//TTz9tuoqUUvr3v/8d4Zkdk8nkn//85yTAM9gGg0He5/jM7NNPP33w4EHTVaSU0kcffRTh8Wyj0ehf//pX01WklNJnn322t7d30XMJ8YP9i9Rqta5du9Z0FSml9MILL0QIo5TSV7/61V6v13QVqSiKb3zjG0VRNF1I6vV6m5ubTVeRUkpXrlxZXV1tuoqUUnrxxRdbrVbTVaS6rr/+9a83XUVKKa2trX3lK1+56LmEeGgsAGct3RAHwGUhoOESKIoiwuAPC7ZEY9BF8bnxnFOTiyzj3PcXX0yQSoKUEaqSk6ZVNVjD8ZFycv00snUiHL8LrmRZxqDPXYkNbuMgGj/4eYrjrRMkjE4m9eI/xYMcvwuuZIl60Jx1smckpuOIuUWi1bMMBHQDTvaMpu80u+ufOoFtsFfS7El0CrNpfHAyJaAX7dzzxGYHW5oNglPLfjapF19JkE3T4KogiGUJ6JN9opNvNlJMHNH6aHEqiaPZ3nSQAydIGYuvZFkCOjn4zzjez07tcFZUQA1ulCD7Q5Ay0mIrWaKAjuNJybhgcfb4OIJsmpg3/LF4S3Sf2dkrUcuz7MEFOXuNrKkhjjgfFXGO30VWsiwBHe36z0kNDgTHufmfJ2n8OkHjBaRIx++CKzHE0aRQt0+kpr8TwSkRkjE1fZVyyQnoZpy933aZBbn7OI6AaRhkeGHZLEtAHx/8jXcHAh57zYpz9hpNqKH5ZvfbOMfvgitZloBOn1+Jjd+0JKZ5ilA7RpB9Ncjxu+BKliigQ4nwJbE4N/8Tlpvlm7UsJ5Jx7hY6K0gPpSmRN01TrJPIFvnTMcsS0GnpcxCY34J/f3WJhjgM/p7l9gmeTl/+KRawEpYooKfcLXTM7RNhxYlFO0Ozli6g9aCJ79S9XCytJQpoezyXSJwRuQg1xLHge5+W5Xw2zjljHNbJZRHkYTd2jMVblh60fess6yQ+vdcltywBDZeLaI7p3PNOQxyZOV8jsjijT3Eqiezi8mRZcsp+BlwQX1SZ16lfOtaDBuJbloBO7i3l8ljwQOcXrSQ5iBZlGTuSYprLqKnTPsfLSS4SAudodlxOTDdiWYY4Tg1AT1/Y2+A5+RGbqQV/UC1LQB9zqRBmoAc9teAPqqULaIgvzkXCJJrPs7DT8SUKaM/P5rKIk4YeefUUCzgFX95zfOMbwBe1gMdcfW52QgouBV2KaBawRcoLbT0swx3AnKbffZu6oFksyxj02TXo0gcwG3dxZOa3OLhEnOGFteCO3XLl1MmHWC/VggOX0bL0oKdO3WQOEJmOJFwOTvuW0JLexQEQn4AGCGq5xqDhUnCZhCmjWgBBGeIACMoQB4TjSYBMGeIACMoQB0BQAhogKAENEJSABghKQAMEJaABgnIfNJfDuQ/rPPdR06d+Un36I3Bn/+VT/u+puTypmCc9/+Hpkyff8ahsnk5Acwk8JfKeM9SOA/Hkvz/3/17EY3dOPc3n3NnBWYY4uHyeGWpnO8gXUcO5szi3ww6z0YPmcnvOQYnn/78X8TzQ46esnY3veernS09Ac7k9KdSe58mTcw5xnJ3F8eTzP/dSKPMUAhpyesrvHHlmFV+UgObyef7O6SJHop9ytwbMRkBzCZyK2lOjCue+/zwijP9GqIGwfMIDBKUHDU9zdpBEn4aF0YMGCMoXVQCCEtAAQQlogKAENEBQ/w+NkUmSHMQLFAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<IPython.core.display.Image object>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ggplot2.theme = SignatureTranslatedFunction(ggplot2.theme,\n",
    "                                            init_prm_translate = {'axis_text_x': 'axis.text.x'})\n",
    "bar = ggplot2.ggplot(seq_data) + ggplot2.geom_bar() + ggplot2.aes_string(x='CENTER_NAME') + ggplot2.theme(axis_text_x=ggplot2.element_text(angle=90, hjust=1))\n",
    "robjects.r.png('out.png', type='cairo-png')\n",
    "bar.plot()\n",
    "dev_off = robjects.r('dev.off')\n",
    "dev_off()\n",
    "Image(filename='out.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Get Yoruba and CEU\n",
    "robjects.r('yri_ceu <- seq.data[seq.data$POPULATION %in% c(\"YRI\", \"CEU\") & seq.data$BASE_COUNT < 2E9 & seq.data$READ_COUNT < 3E7, ]')\n",
    "yri_ceu = robjects.r('yri_ceu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAACKFBMVEUAAAAAv8QEBAQMDAwNDQ0cHBwdHR0eHh4gICAiIiIkJCQzMzM3Nzc4ODg6Ojo7Ozs8PDw9PT0+Pj5JztJLz9JOTk5QUFBQ0NNTU1NT0dRVVVVWVlZbW1tcXFxeXl5gYGBhYWFiYmJjY2NlZWVnZ2doaGhpaWlqamptbW1xcXFzc3N0dHR1dXV2dnZ3d3d4eHh5eXl8rgB/f3+BgYGEhISFhYWGhoaHh4eIiIiMjIyPj4+QkJCSkpKT3uCWlpaXl5eYmJiZmZmdnZ2enp6ewkae4OKfn5+fwkegoKChoaGioqKixE2k4eOox1upyF2qqqqrq6usrKysymOtra2urq6vr6+vy2ixsbGysrK2tra2z3a40Hu5ubm6urq7u7u70oG8vLy9vb2904W+vr7CwsLDw8PD6OnExMTE6OnFxcXGxsbG6OnHfP/IyMjI6erJycnNzc3Ozs7Q6+zR0dHUn/vUoPvU1NTVo/vV1dXWpfvX19fZ2dnb29vc3NzfwPfg4ODi4uLjyPfjyvbj4+Pk5OTl5eXn5+fp2/Tp6enq2vXq3PTq6urq7uLr3vTr6+vr7uTs7Ozs8PHt5PPt7e3u7u7v7+/v8Ozv8O3w8PDx7/Lx8PLx8fHy5OTy5uXy8fHy8vLz8/P0xsL09PT1q6X1ubX1vbn1x8T19fX2mpT2nJb2npf2o572p6H29vb39/f4dm34+Pj5+fn6+vr7+/v8/Pz9/f3+/v7////5KPUeAAAgAElEQVR4nO3diZ/exlkH8FekpWvoGWgpacmma9pAqFveQEnYlDrEYIw50uAYCNAF43LEDISzhJiyQNNgLuGSLBgW6mU5XI73ffey9e+hGc2tmdGMNBrpfff5fZLdV+9Ir/bV1zM6RsekgKx0JkP/AZB+A8ArHgBe8QDwiqcF8L7I7GDfldk9Z/HhzFk8d5beO3QXuz/7wP3h7tL9I+l1fJK4aQGci+wd5q7s7jiLj3edxTNn6c6Js/itPWfxfL/DrG8Xt8VAfJK4AeDwWQMwCwAPHwAOnzUAswDw8AHg8FkDMAsADx8ADp81ALMA8PAB4PBZAzALAA8fAA6fNQCzAPDwAeDwWQMwCwAPHzPw/Pr1uTZ0vUw1KH1VAF5S4PNbW+e1oa2trQ9Xg9JXBeAlBS7/7I3i9tkbRXHjBn6J/ysuvkYLRVYTGKGVBy5+7SM3i8fyp2/lV6/mFPjaNVzw8W/7jmOR+w+OXTk5cRY/cBffd392w6zdU993FSPknvi4EC8PUmK1iXUja6N4ZHNzd/Pcuc0KeP7Cq/jt33vld+6IzI7uuHJvz1l8su8sPnCW7t13F8+cxYdzexkq45z4TrHLX+6ko2oXM/BFTLpRbN0l9ZYA33qGFUqN1Uo20RjYOfEKNNE3NzZuFq+dfemkmM/p0OkBbpz1CgA7I31VAAZgRwA4QQA4fNYAzALAwweAw2cNwCwAPHwAOHzWAMyydMDsAAcA0wAwADsyOmB+jBKAaVYLGAEwifRVVw8YecwagFmWDNh31gDMAsDDB4DDZw3ALAA8fAA4fNYAzALAwweAw2cNwCwAPHwAOHzWAMwCwMMHgMNnDcAsADx8ADh81gDMAsDDB4DDZw3ALAA8fAA4fNYAzALAwweAw2cNwCwAPHxOO7D5an4AplkJYIMwANMMApyRnwDsl+UDzsrk0ET75rQDB82aBoBZ+gLGvwDYL8sHTAPAfgHg8FkDMAsADx8ADp81ALMA8PAB4PBZAzDLOIBNxzIA2BHpqwIwADuSBth4tLkLcPl5AFxlxMDum367Zo3UD4xPEjcrD2zuLpq7b+veBAw1uMoYgM39Re2B8YSwDqYZBXBugmy/DgZgKSMANlfUpudu2GdNGmgAphke2CLZGhgBsJwxA7eadTUVALOMFrjtOrj6MABmGQWwsbgdMAJgNYMDG31xrW4NjD8PgFmGBjY30G2B+YcBMMvKAcNGlpr+gBeLRTOw3bfVOhiAa+kNeNENuOVGFjTRegYGNqc9MAsAs4wSmASAHdkXOTjZd2U+cxbfnzuLj+xFpe/sgXPi2YGz+Piw7axxpEWwF58kbloA3xbZO7ztyt07zuLjXWfxzFl658RZvLOHDxpbc3Cvw6zfLN4UA/FJ4mY5m+i8eTfJ3Z8Q2ESrn7TqTbT0VUcMvGcC5u+FAWsfBcAsw9Zgw7u8WgcDy58GwCwDAjv2gXGCgPXPAmCW0QHzlWkIcO2jAJhlWGBXMQA7In3V8QLHPNCh/1sBYJZ+gBfkpxsYwZEsmuUDXiyIsAsYN6oAXAWAA2ZNA8AsfQDjfgYsDMB+WUZg8rsBGNbBNMsGzCowXOHvmWUD5kkBbNmVBmCWJQe2HSwBYBYAHj4AbJ01snUpAzDLUgMjAF5pYLsvAPMsN7C1FIBZBgBmLl2BXd2NAMySHpg3qx2B7e1zDsAiADx8Vgw4jwPsPh8EgFmSAwuYTsDO+gvAIksK7NhDIgFgFl/gzFg8FHADLwCLeALTB+XoCQdG0YCdEwMwix9wFgtYlgFgmhEAR6vBUYBJAw0d/jSj2shSal4nYDijg2X1gBEAjxVYXXV2AYZzsnhWDhgB8FiBUQxg9hkATDMyYGmwFTACYBzpq44HWD/81BrYY9YAzJIYWB5uA4wAmET6qgAMwI5EBK4dXmwBLLXyAEwzJmC1OBxYXosDMM04gJGhfyAYWPmAbsATeaFqC3iCi6sR6G/2i5VK001ImTzSRB3JIysAXOJOOwOr/0Q6AU8ahmSkiXFQTKcXStN7ZmWAteLhgEml4/VtIn5Ims3AE/6GWjg5fcBk3TntCKydpROhBjMLLjLhUFqbbGyiG4D93ZYfuNSZltGKw4D1s7A6A7N1pwxcqDY1UXlwov4vFU5OIfA0CrD/rJuBBURbYKlen3JgZKzAYcC10+xiAU9aAmv/NtT19WkDNp8EGQJc/4QITTRd60pbWryMjSetfKVBQ+uujKtMtPrAaHzAkRd3x89ZBWBTcQCw4V9If0ey4ggHfMqyA+eGfWCcQOCgWcOhSpYUwNOuwKY2AIBphgfGm9CdgI2r8C7AR3M1RxGUOmTZgU27SDi+wOZ1eBfg+V018whKHbIKwCZhT2DLNjgA0wwObL2OKAA4eNYAzDJ6YHP9BWCeoYHtFwJ6Adt8owN/8fLlF4v9y+TnywX5P1UAOHzWwcC3v/vChc88t/3eCxeeL7bXC/J/qiw1sN3XC9jqGwN4MplwYAx6/NcVKwCzWPZ/WLoDW7agcXoALn8+9I53vGckwPPr1+fa0PUy1aD0VXsEtu3gspTADl9vYHNxP8AjqsHnt7bOa0NbW1sfrgalrzpsDe4G7PCNvQ7GoLefGxNw+WdvFLfP3iiKGzfwS/xfcfE1WijSL7Bz4p0Tl1AzsHPqyMAnz66tvev29kNra++lr3sU1WJZB8/zJ4rH8qdv5Vev5hT42jVc8P2f+L4DkeP7B64cHTmLH7iKp9MT58SHD7CQvfjYOXX1r8M6uXPWCBViYFZfdMuwHzx/YXO3eGRzc3fz3LnNCnj+wqu45HNXXtoTOTjec2V+z1l8MrOXlQ30kXPiexjIUXzgnPrIPbVr1uqUd+vLbhmAz3+hwKhbd0m9JcC3nmGFUmPVXxPdtI2V77ia2MYm2rEFjeNoosmES38k6wMbGxvFa2dfOinm5d93c2PjZmLgqa0TQV3Q9mI3cMPEqw/sjPRt+wV2TtxA5ARuqL+NwM6NrJMjNScRlDpkpMANSznvDtx21lPUsBV9cqwGgFss5WYjF3BT/XUDT6E3iWecwI0NtGPW5MmJAMzSH3AjUR/A5VQYeArALCMEri4WR22ePlrxQg2W0hswNiL9wdYxLMCs9rYApvU3b3Go8sUCd/LjDv+0nf04Sw3sqMXRgVEo8EQAf+a5onj887jD/1zSjgacpQTGC5v2B9tGMQOj1sCYl82sVXfhmvQzZZYZOLg3CbUGXpDqS+cWDFxcurRWdfh/GYDLTC1LmQV1BM6DgUtdDEz/MB9gcgU3B/6Vp7+9gBrMQw9SOoHbPJwSdQGm61+cFlvR69sAzDNtAEZtgaVDWGHASKq+OQCLtAaeGpaytLix0k74fjCekDIFAZMNLKnzA/aDWXrZyKK1cCf8UKV0BCsEuHbkC4BZ+gQO7U1CSh9DAHD9wCYAs4wIWNrAwgmswWopALP0ASx8nRObgfmgNzCeSJ+TG/jwnprDCEodMjpgsS3TBByyDkYtgY0dT27gB3oiKHXICIGZsKVLh/kG7iapUp7A5o5FaKJZWgBPvYGDmuipNoEfsNkXgHn6AbYveSnGJloM+wMbSgGYJT6wqMBBR7LIIQ75LE0vYNs/IwBmib8VLVXgUGCFygcY2c6+B2CWPoFDanC9JnoAo3jA+5e/SM7keHk/glhglgyYpDoW7QuMTE1tMzCZxnzyffjlo5NLpJMBdzmkzpIC+/YmTfVDWDSNwLVppMFw4A9+7PUS+Mr5d0cQC8wyAdNlHNJdOJ22A65P0gl4/YlXttdPnv3se74cgSwsE+mnZ6RvHRtYaRJNwGIfyQuY++rFPjVYn3UX4GJte337fZcvr8VRC8gyAVdPR6ILOgi4VtwEzM/eqKJ+SBvg5z+0/vgFfDOlOGz+kZ4BMZn4OUtfPDKwutWqA9MbB4cA57ad2QbghQFYDLW6R8dkHdfe1OdzjKsGT5uAp9Ky9gS2HKxoBq4Ji4El3A9eBuBpG2DbwSg38KIGrGQJgYMifdXowPJg7cy3ag0cAkzGNu3LNgM7ipcQeBzrYO3AUe3MNySvDL2BjcVtLj7jWUJg+np4YPtSbgNs73TqEfjBfTWj6fD35B3oQIfWQHsBW3aRcHoEHlnEc7e8J5G+6hDA9A1PYEvxaQMO4B0GuObrAWyvv6cRONlGVpZl7GUAcHWQIwjY0UBbgMXYKwccFOmrtgNmwv7A2j4wjh+wrdgCzMafqYNaAJjFvwabFmYNWBFrAnb6GoHRCgOHtNBd18HcVwOuHZAQwAbfJmC3r98pOysEbBqwRvqq0TaykHZSHF/KJOHAzhVwbgBWj1wBME1E4PrpTyrwFIUCu8p1YO3Y5KpuZA0GjDlqh4z5UhYV2Be4qQKbgGVhAKaJBGx+BjC/fkRUYE/gJt4asN59tHLAKTeyREQNNj4idsYLp6je6DYBBzzi/RQAB0X6qpGAzacfs+tHSAtdq5QOYNQOWBIGYJo4wJa7f8/k0lqjawcmvgGPeNc2sfB8Vg940CbacgHBTC7tG5gLkxmtHPBEHmiK9FXjABt2kXCqpcyqt77VZAWuGvMQ4AUAK5G+aqytaOMmrwA2ldqA6co6qAbr0wMwS//dhdanc/QFLGZtDQCz+O8mWZay9QI/KzDb2vYFNp9dt3LAg25kmQ5T0qWMUCAw5iW9GQBMM/xuErIDI1MfBI0dmLzwBLacIAvANJGAzYotgMXOlB+wfoBDzNqVZQSeSK+bIn3VCMB2xJnzCYYmYBQKbEs3YGVdpy1UsaDJWJOi/gP/nPBSZc1J16NsdTqZKB8iD2rzH3AjCzma6CnrZjCUGoBRGLCjO6ITsGkBGxa3Cbig74lfihiXFL/03zXhUQDblrL5IGUVCzB77QNsvzylCzCuRqyi0QpY/ZAsJoU0PJFUZDr5leqzTMCOnr0ZCgNGYcCuHsUINZj5CcWJUmQGpgOWJpoDa00z+83Gra8ihgW2LWVk7EaiqQGrIzYBk7GtVbgzMKnGbEhTlNtfrb3lwNp7yic7a7AFOGg/WHoadtMTwB2P+N6rngBOevYsj9rGa1/rCPfu1x/MLY3p8wRw+zPAD50T70mLoP4EcBnPDDyhTTYF5oMCpP062AYckrdE9o/ecmXvrrP4eI+c3GorxttX1dmvptLd++qwNuKde85ZO2aLM3dOvFPs8NdvOoEnBmClmZ4ogwmAfbylxqpzE+1oockGtH0ErYnWz+jp7QnguWcTTde60paWwlAHZrs+1bB9N0lSN+0u6YgjALYsaUtHP4sBWB5ufgK46y/r7UBHiwYzPCMC1uudHLevBlz7nMYHRA8D3FI4ZBNJm8UIgM2FxnNlpSjAdbIm4FPWXWgbNEb6qj0DWw5ikcjA+knxuQuYjHva+oNtg8ZIX7UjcPs1sApsGM0KXFX2U1uDfSJ91QjAljLeQtsmloBN61QncK+dDSN96krASlz6qr0Cz5ybQgLYuM1kfcR7NfKpu8uOtAvWGOmr9gbMr1axTqwB68WuB0Tnp+82SvpBT2ekr9oN2A6I8H0Aard8VcKBzSvyqE8f1QLALB7AZsHpNMucvALY8iH25weTlwDsiPRVOwHbfXE3Upa5j0ZQYMMeEokBWB7z1AEPcStD9xq4wZcB47FM54MMDfwv/734n38dD3BQpK8avQaTW7Sg5k0sBmxtBerA8qh9HuiogP93sVj8HwN+/cyV4uGi/O/1M2deL86cufLww+S9FFF6loavweQmS6QCN6yCK2B7K298diESr/sG/loJ/DUG/PV/+sj2lz760S8Va/gh72vF+vb2On4vkqEzwjT1frDNNyPVNwDYqwbL4/V7JKsC/o8S+D8Z8EOPPprvP/30vgyM34tk6Iw40OHdWktftSuwoYXOeDeSeyljYLuv8dmF8kDfwF/9t3//r68y4LXiuduPf/7zj8vA+L1oio6wjazCf3UsfdUOwEYb0UI3rIEJsMPX+Wg71Gtng2Er+saZ88fE9saZMzcoMH4vHqM9g9VgKw73bazBDt/6o+3UEU/bbhJ5NSLg3AfY5Rvh2YX2LClw2q1ovMzFXSuliAv6G4BdDbQCbBrvdAJ7Rvqq3YCRDTjzWMoN+8nSwylNI542YP38TGekr9oR2PKA6KnPUm7YCuv2YKyVe2ZDImBlQ9YOTHeBnUuZHKJ0lA8HfHygJsm2sj1DAk/rwJl0kNK1lDPnCjjv+GAsaKJZwoC1Yw2Z4SYs8plYrqXc5Cs/GAuAE/UmSQu7WvB1YLkCu5ayewsaZ7jnJo0SOCjSV225kVX19Rpq8DQE2DlrAKYZENj4eFmpn9++lAncWB+M5Qu8/7LpLfHui+EyprBzOSb+51VKX7UdMD7irAFXxzzUc6GtS5mMMdrnJlXA//CLP/bL/+gENjxKuHxLvBvpYeHKRpZfpK/aBVi+EVqW4ZvdILWf37aUkR+wcz3dO/DPlV/l5xmw2uH/8JXyR1G8421XzuD3r1wpi6sTAMq3yv/wq/L/5QVGtALrwAvtyQyWpYz8gN1r6d6Bf6j8Lj/CgNUO/697oqAPAz8p3ydDT3wdGaeqwfgVeWtZgbN6Dc45sHQ5Uifghu3s3oF/qvwqP82A9Q5/BvyDj37DdjW0RsapgPGrtdhNdMqT7ggwXucqG1mLHJ/LIR+BNC9lJufxWB1Hee/Af/MT05/8Owasd/gzYNwzTIaeexsZpwLGr8hbEYGDIn3VNsCI1t/a3Wbpthd3MS5lXjObn7ri+suG7fCvgB96oXz/3Wuk+O1knPKt6t3j31x79u3dcXHSA1ctNH6lACO6avYAJsegG4Gdf9kIdpNETra33xXO4JkBajBXNAO7ljISEzc9lAPRl+ZRRgV8e329v9OzkgOjGnB1LjQ+tIUagOUtp4ZnNiB+LNo4zqiAe81AwOSlAK5otTPtWgJXI70FwFWGAa5O2cDAWbVOzhmyC1gZwXlLfyR6k8xj9Qh8cqjmJIJSh6QGllroCpjuM+mb0IaljHyA2SjD3U74/oma+xGUOmSYjSxeg7OMV2A/YH4WhwsYDQkMTTRnPN7NZGB9hTkzTCjGMN4QXLTiAEyTGjhTgHO2WzzNstppkupS1nztd3yvRgFgmsTArJ+BRKyDjZeLKktZ97Xf8R1VLwG4yiiAUf0B3/pSrhVbbgjOfEd1JIv03dMO/Ej9+N5JD5zJwKIC1+9qJy/lWgV23/F9BMC/+gO/wYFJx9DzVXGkTiLvpAUuLQ3AZOu4blIDVkrNd3wXwAM30T9cfq8f58C4i5/06T9cva7OAai6/6sTAUjP/5mqqz/q5f/JgaWnudOtaFY99TqnA6vnuevA2icMvZH1TeUX+2YGTDr116qOfdrdX77+EulWeuebb76T9PGXBSesqz/i5f+JgauL92kqYO7rADaUG24nLI8wNPC3lN/sW6Umeo11BNPOQvr/WrG9vr7N+vj5CQARL/9PCkw3l9lgBWzzlZayqdxwO+ExAf96+c1+qwZMOvYpLu7VL3/tPfXUnhjCJwDQ3v/utiTJgTPBcJz5ARuL9bvNamMMDXz3K7/7lbsaMOnYp8C4V590/xeXLvEzAvAJALz3v7stSUrgrGqh+br0WPG1ApuLa3ebVUcYHNiwH+zVsR+79z85sGihc3cFZkvZUqzfbVYbYYzAXh37sXv/09dgeZgdijbutirAtVL1ZqS1EcYIPEiSAmvPa7d2M8hL2Va/AdgvCYH5+dDScHXjYPNxJxm4XqrcbbZeDsA0KYE1CtdBDr6UbXeTle9VWY0wlq3okWU4YHbTMzewrVC+laGpf2E4YP2hiQcRlDokHXBmAbbeLgcvZatv7V6VowE+vU007lAQN05ydCNJS9kNrNbfkTTRpxaY7COpFXjXeV/omfMSFOVelcuxHzxILMBvvCENzK9fnxfXy1SD0lcNBRb7SMqZHLal7PBltzI0ts85APOYge9+8po0dH5r63yxtbX14WpQ+qoBwNpOMOY9dt/YfebyZbcy5B3AWvHgwP/09//MgVOfxSHHDHzyYgl8++yNorhR/r9B/isuvlYVSl/VB5gteryNpa6CkXrkUo/7lsKO/SucoYH/qvzT/pYBpz6LQ46lib5WAj+WP30rv3o1p8DXSKU+/6lPz0WO7s9dOTwsfyBEXlcb0ayEHaScirdqQfIE5lJ7+cGR8y87OXYWu0vnhXh5r7boKuA/LP+2PxLA9OSNj7/++scTns3RAPzI5ubu5rlzmxXw/IVXccGLl57fFzk42XflYLa/jxXKl6TGZryEH4auSk2pAJ2FttIys0PnX3bsLj5ylu4X4uWeBfj3y7/uDwQwPXljffuVJxKezdEAvFFs3aUvyv9uPcPKpMbK3URn2Q7fTlIPU1a2+gXB6sSuBrihfc6Hb6L/rPzr/lwDxidvJD2bowH4tbMvnZSNUVHc3Ni42QK4uuiIvcYefJhtQjcYWu822ug7OPDdv/yTv7irAe899dTjzxcJz+ZwALsifVV/4EoEX2VWDi0WC3EytHlip2Aj7wiA5d0keu+G4tIlsrmV7GwOnD4PdEjAtMqR7sEKGDn3kVyE9LOa73TnyDj3g/u4l0OiI1nMt6rVBBjhO3U4ES2furrAfdzLIQ2wWGcSYuKLlM5hNR4VuPlWhq6ME7iP9A0sDiUSMWnvqLplg3G6BRm5y32yAJilZ2BkAc6d/UgOYL6DNFrgB3oiKHVI78DVyRZyAy2fS2mcrFxF24DFZKMFPpqpOYqg1CH9AiMZmGo2AiN830rkuk8WuZJ/rMCnqolGUnd89TJjFwS7fRfGpcw/Z0W3ovtIihos11beSFv3gfm4ltsosSYBgL2SYjepVoEZsOG5SdIekus2SgDsm55rMPmFxKntmdLNYHowlkAMv8uOlFEB71/+InkYw8v7EcQC0yswa095TxLfRyIHsgzPTZJPvgm/y46UoYHzzz555TYD3p5cIg9jWE/yUHc1KYBJpSVvkFv3k5N3yCk7dWBkA65tdY8c+OqTTz75Mxz4gx97vQS+cv7dEcQC0ycw2+TNmHAmgJERGNmA67tVIwe+VAL/KAdef+KV7fWTZz/7ni9HIAtL/xtZ6iZWdRADkY6lGrBqWLuFg7LVPXLgl0rgXxDAxdr2+vb7Ll9Of3ZWGmDaRJOOBtHRHwwsjzty4Lc+d/mXdiTg5z+0/viFCxc+81wEs6CkrcGkp59fL6oDIxuwwXfswMpWNN7AmqzTJ3EkTu/ASgstAec1YB1Ru0eHdlRkmYCHTBrgqdjEQuJMHk9goy8AeyZRDcZDGavBDEsFrjH63qPDGACmSQNMhsRBLFqHtaeu6Iyet3AwB4Bpej+jg/uIo5RZALDNd7zAxwdqYp4i2SKJgafVTlMd2MAoARs+e7TAI0v/Z3Sg6pozqZOQeUnAJseZ8QAHCwD7JQmwaKGVM7FU4NplDDNHAw3AvkkFzE/FmppqsFFx5jx7FoD90v8ZHVIF1rw4sBlyRt+GGtwlPQMvfIAt52dxX1gHd0gCYN7RoFdUBmxpiOm7ANwp/Xb4k0OTZOvJdC0wBTaffofkQ5WGzwZgvyQA5j2FujAFNlZgVJ06aw8A+6VXYHKGs9JCZ3JxBazePkue1PnZAOyXPoBZb1F9H4kdha5CgJF6i2EaBMCR0jtwWYEVXx3YsgZG5OoVRwDYLz0Ac8UKeCpdclYHNtRev6UMwH7pERiRbSwkXc2AMn0dbGyfvZYyAPulx40sRC8TzOTIIxzvoqlDGIBjpJcaXP1GdEtJaaHlEY+njjslAXCc9NNEk9/sOl9j7cU5NvUh+S5lAPZLfGB2GUO9hdbHNB2DNp8XbQgA+6UP4IqJXsgtb2JpYxp9xTsAHCO91GDeC4Qy+yZWOUJ9AwuAYyc6cCZuwCGfylEHtnUheS5lAPZLH8A5yuglSTJwXvd1H6sC4BjpAzir7kmJpLPtUFZf2WaZ6RYOvksZgP0SG5gedaZHn3dyXoMNW1OmK/y9lzIA+6UnYMSB7b4qcL3zCIBjpAdghHgVFsDa5jIAp0ofNThTN50NBzmqDSwOXBYb+n8BOEYiAzNN2hXMfdUKvKiOUTLgqtYDcC/poQbjX4QX7RLbrHZjaKTWYMsTogE4RvqowfyU5l1+lFIZidVWAE6QHoGnHFjt6OfHuGA3KUF6AM7xrbGUGsyB8apYnP4OwAnSDzBudMvfvIlmvrhzQRyjBOAEiQss7SThFW1Gb9nAijGwdAwagBOkN+BswR6eI1bA9BZobJ8JgBOkD+BqDUyB1VNlq/MsaQA4QaICZwwY2y7obSl1YCFcAteOcXkvZQD2Sw/ArHnm/Q5ihAU9D6/K8W79IKb3UgZgv/QCvJCA9forXZECNThBegBGogIbgJEYlwC3XcoA7JcWwHdEZkfSwB1Cin+xGkw2mjNeTt6WR96/48qBs3TvvrP47sxZfDjvMOs7xS5/uROfJG7iA4suQkRu3c+LFV/2r6H1UgZgv8RsoqWjHFUfIXk4Ay/WunydG1jN7SQ00X6JDSydCE3P7GClCwVYvxg8eCkDsF+iA2cKsCis+cJJd0kSEVi2zfVdYNmXnRoPwAnSDzCrwPwR79Ue0pSNp52T1WYpA7BfYgMrzAK4utqfjwbAyRK/BvODHFm2y4BFJz9vn3MATpJ4wPJxSlpLjcBwRkfS9AmcZRRY8pVfA3CCxAfOpH6GCpjfCiuT2uccgJOkB2Bxurv8gGg6inzVKAAnSDTg2j4SByZn6Uzz+h1nAThB+gGmjgKYXQmuHN0C4ASJD4w0YNFAI6X+AnCSRAZeEGDmqD35TPMF4BSJBaxsQUvAkqlegQE4ReIC0zufTenJ7vSxOtUYNV8ATpHowIs84938FXBufCJHDsBJEhWY3sKf399sV2xAG3wBOEViAlc3L8zELYL1DWgATp9IwKICLzJ1tctf1HwBOEWiA0s7Q/yV2ReAUyQiMI0yvKgAAAWLSURBVBKXI5H3NN/6ggLgBIlZg+lusFqBzRvQJACcIHGAmS89EE3eo10Meg+DFABOkIjAiF9xht8iqjvSKXZS6F4UACdIPGDET+Ugb5Gbne2YVsB8LwqAEyQe8IJtYpF3qtc7puYZgFMmGjDiwOSJHOT1rql9pjfbyQE4SaIAs73gTK7AuL2uyG0LCoATJBowv+cKfoPuITU8QxSAEyQWMFIa6Or3AoCHTyzgBb8vB99wxj+cDwEG4ASJASx3NOCdJCTFtaAAOEEiASNWg5mv9QC0FABOkKg1mBzlkBpo6fJRUwA4QSICk3v4s3Mnyc9dAB48EYD5+bJClv4E4OETBxjxfaSFvH0FwMMnUg1mwAiAR5Y4wOw4dM6h+ZUNjgBwgkQB5v1IvKmu9o8AePh0B844cHVocjH1Ba49HjpkKQOwX6IBY11Sd/F9+6d2YKyK+wunNFn9MeBeSxmA/RINOJN82QEsDjzlv6em6E9G81nKAOyXGMALGbhsonmNFMBVtbXFfFATgGOkMzDvaGCbz3VgB2zVngNwf4kArPoi6Z79FbCr4tLqC8C9JT6wRNUM7OpWBOAYidhEL2pSGLgB1tGpCMAx0hVY99WBUYNv66UMwH7pDqz6WoCRoerCkawUiQZs8GWytSYZbmWYLrGA676mSquOBMAJEglY9+WHrFwrXABOkI7AoqehAqxtU3FcwxNWADhBugLL9bc6coH4NhUHtiwoAE6QzsCZOMRBD01N+W+p38EUAE6Q7k10hqTIxyAtfQgiAJwg3YB138p1SmsznDY7gnQGrvlKFReAh09HYKmPwbA7BMDDpxuw0VcUA/DwMQPPr1+fa0PiLfHlxOnQ5kMZADx8zMDnt7bOa0PiLfHl2EGOqWVzGYCHjxl4A/93++yNorhxgw6RH0Xx4qXn93noUQ68XbVvysHM+DbL/bmz+MhZOnvgLj50Fh+7i92z3i/Ey720XOFxAD+WP30rv3o1V4DPf+rTc57qPPe5NYeH9rIy993Fx87SwwfO4oMjZ/GJ+8PdpeX6iudeWq7wOIAf2dzc3Tx3blMBLuQmeuG+Bwc00SOIA3ij2LpbXLtW2IG1J4DXAsDDxwx8c2PjZvHa2ZdOysaIDpEfJNJXBeAlBXZG+qoADMCOAHCCAHD4rAGYBYCHDwCHzxqAWQB4+ABw+KwBmAWAhw8Ah88agFkAePgAcPisAZgFgIcPAIfPGoBZAHj4AHD4rAGYBYCHTwtgKde+t8vU3/XbHSb+47NdZv3pqx0mPnz/QZd5p0034J/9zi5TP/pyh4lffX+XWX/P5Q4TH37jvHmksQRqcHhOUQ2GjD4AvOIJBaaXKDkvXnJNfL31xEXxxhvSXxA6NZ/4eouJO33rYRMKTC9Rcl68ZM2tT2xttZ64uPvJa9JfEDg1nZj+BaGz7vSth00oMD3/3X7xkiu3nhGfETxxcfLitfZT04nJXxA+67zLtx42XYCNFy+5kn/kIzfNE9+sSty5JoDVqZ/I88f8JiZ/QfjExTx/ov2sB00XYNvFSw3Tt55YAlan3t3cfMRv4qLdxPMXNnfbz3rQdAG2XbxkzcUuEyvA6tQXPaauJr7YbuLzXyg6zHrQhAKTS5RuX3RevNTPxJVR66k7TfyBjY2NDn/4kIH94BUPAK94AHjFA8ArHgBe8QDwimekwJMy1Qs2SN+c1Eahvyd0bDqgjnuKM9KFIJPxoYk+Bn9zUgMe7TdLnZEuBqEkWdWBCxP0BICljHQx8Pa3YM1woTXRAOyXkS4GqVEW1dezBqvV/rRnpIuBAVd11roOLgC4KSNdDGL9K3n5bWQV0oYWZKyLQdRcVpOLht0keXcJgEVgMax4lg94AscwQgJLasUDwCseAF7xAPCKB4BXPP8P+u1PVeX6KcsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<IPython.core.display.Image object>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scatter = ggplot2.ggplot(yri_ceu) + ggplot2.aes_string(x='BASE_COUNT', y='READ_COUNT', shape='factor(POPULATION)', col='factor(ANALYSIS_GROUP)') + ggplot2.geom_point()\n",
    "robjects.r.png('out.png')\n",
    "scatter.plot()\n",
    "dev_off = robjects.r('dev.off')\n",
    "dev_off()\n",
    "Image(filename='out.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>STUDY_ID</th>\n",
       "      <th>STUDY_NAME</th>\n",
       "      <th>CENTER_NAME</th>\n",
       "      <th>SAMPLE_ID</th>\n",
       "      <th>SAMPLE_NAME</th>\n",
       "      <th>POPULATION</th>\n",
       "      <th>INSTRUMENT_PLATFORM</th>\n",
       "      <th>LIBRARY_LAYOUT</th>\n",
       "      <th>PAIRED_FASTQ</th>\n",
       "      <th>READ_COUNT</th>\n",
       "      <th>BASE_COUNT</th>\n",
       "      <th>ANALYSIS_GROUP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>9280498</td>\n",
       "      <td>334097928</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>9571982</td>\n",
       "      <td>344591352</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>149044</td>\n",
       "      <td>5365584</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19240/sequence_read/ERR000020_2.filt.fa...</td>\n",
       "      <td>2057690</td>\n",
       "      <td>74076840</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19240/sequence_read/ERR000020_1.filt.fa...</td>\n",
       "      <td>2057690</td>\n",
       "      <td>74076840</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>9388168</td>\n",
       "      <td>337974048</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>7762958</td>\n",
       "      <td>279466488</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>9625450</td>\n",
       "      <td>385018000</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>8808642</td>\n",
       "      <td>317111112</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>15187</td>\n",
       "      <td>683415</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19240/sequence_read/ERR000025_2.filt.fa...</td>\n",
       "      <td>2159324</td>\n",
       "      <td>97169580</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19240/sequence_read/ERR000025_1.filt.fa...</td>\n",
       "      <td>2159324</td>\n",
       "      <td>97169580</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>59312</td>\n",
       "      <td>2669040</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19239/sequence_read/ERR000027_2.filt.fa...</td>\n",
       "      <td>5080128</td>\n",
       "      <td>228605760</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19239/sequence_read/ERR000027_1.filt.fa...</td>\n",
       "      <td>5080128</td>\n",
       "      <td>228605760</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>11752662</td>\n",
       "      <td>423095832</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>229179</td>\n",
       "      <td>10313055</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19238/sequence_read/ERR000030_2.filt.fa...</td>\n",
       "      <td>7692812</td>\n",
       "      <td>346176540</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19238/sequence_read/ERR000030_1.filt.fa...</td>\n",
       "      <td>7692812</td>\n",
       "      <td>346176540</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>11402532</td>\n",
       "      <td>410491152</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>6777368</td>\n",
       "      <td>243985248</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>78918</td>\n",
       "      <td>2841048</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19239/sequence_read/ERR000034_2.filt.fa...</td>\n",
       "      <td>1131253</td>\n",
       "      <td>40725108</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA19239/sequence_read/ERR000034_1.filt.fa...</td>\n",
       "      <td>1131253</td>\n",
       "      <td>40725108</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>12013717</td>\n",
       "      <td>432493812</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>8045886</td>\n",
       "      <td>289651896</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>9081298</td>\n",
       "      <td>326926728</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>10130502</td>\n",
       "      <td>364698072</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>8632879</td>\n",
       "      <td>310783644</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>SRP000032</td>\n",
       "      <td>1000Genomes Project Pilot 2</td>\n",
       "      <td>BGI</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>SINGLE</td>\n",
       "      <td></td>\n",
       "      <td>8108919</td>\n",
       "      <td>291921084</td>\n",
       "      <td>high coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178036</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000205</td>\n",
       "      <td>NA19201</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>61270</td>\n",
       "      <td>6127000</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178039</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000208</td>\n",
       "      <td>NA19207</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>61280</td>\n",
       "      <td>6128000</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178054</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>55356</td>\n",
       "      <td>5535600</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178063</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000209</td>\n",
       "      <td>NA19209</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>67550</td>\n",
       "      <td>6755000</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178081</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>51922</td>\n",
       "      <td>5192200</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178084</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000213</td>\n",
       "      <td>NA19239</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>56870</td>\n",
       "      <td>5687000</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178096</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000195</td>\n",
       "      <td>NA19144</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>873195</td>\n",
       "      <td>87319500</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178099</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000195</td>\n",
       "      <td>NA19144</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>821999</td>\n",
       "      <td>82199900</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178117</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>50397</td>\n",
       "      <td>5039700</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178135</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000214</td>\n",
       "      <td>NA19240</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>55358</td>\n",
       "      <td>5535800</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178144</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000207</td>\n",
       "      <td>NA19206</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>64122</td>\n",
       "      <td>6412200</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178171</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000199</td>\n",
       "      <td>NA19159</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>72884</td>\n",
       "      <td>7288400</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178183</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000182</td>\n",
       "      <td>NA19098</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>980198</td>\n",
       "      <td>98019800</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178189</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000182</td>\n",
       "      <td>NA19098</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>1062464</td>\n",
       "      <td>106246400</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178225</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000198</td>\n",
       "      <td>NA19153</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>785777</td>\n",
       "      <td>78577700</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178282</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000212</td>\n",
       "      <td>NA19238</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>59886</td>\n",
       "      <td>5988600</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178312</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000205</td>\n",
       "      <td>NA19201</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>63977</td>\n",
       "      <td>6397700</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178315</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000208</td>\n",
       "      <td>NA19207</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>63850</td>\n",
       "      <td>6385000</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178324</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000209</td>\n",
       "      <td>NA19209</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>63639</td>\n",
       "      <td>6363900</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178327</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000207</td>\n",
       "      <td>NA19206</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>67132</td>\n",
       "      <td>6713200</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178480</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000199</td>\n",
       "      <td>NA19159</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>68438</td>\n",
       "      <td>6843800</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178750</th>\n",
       "      <td>SRP004078</td>\n",
       "      <td>1000 Genomes CEPH (Utah residents with ancestr...</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000075</td>\n",
       "      <td>NA12750</td>\n",
       "      <td>CEU</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>67944</td>\n",
       "      <td>6794400</td>\n",
       "      <td>exome</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NA.298</th>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>ACB</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NA.299</th>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>ACB</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178759</th>\n",
       "      <td>SRP004078</td>\n",
       "      <td>1000 Genomes CEPH (Utah residents with ancestr...</td>\n",
       "      <td>WUGSC</td>\n",
       "      <td>SRS000075</td>\n",
       "      <td>NA12750</td>\n",
       "      <td>CEU</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>44974</td>\n",
       "      <td>4497400</td>\n",
       "      <td>exome</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NA.300</th>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>ACB</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NA.301</th>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>ACB</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>NA</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>-2147483648</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>184746</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>BI</td>\n",
       "      <td>SRS000096</td>\n",
       "      <td>NA18499</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td></td>\n",
       "      <td>871763</td>\n",
       "      <td>88048063</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>184747</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>BI</td>\n",
       "      <td>SRS000096</td>\n",
       "      <td>NA18499</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA18499/sequence_read/SRR797225_2.filt.fa...</td>\n",
       "      <td>8623980</td>\n",
       "      <td>871021980</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>184748</th>\n",
       "      <td>SRP000542</td>\n",
       "      <td>1000 Genomes YRI Yoruba population sequencing</td>\n",
       "      <td>BI</td>\n",
       "      <td>SRS000096</td>\n",
       "      <td>NA18499</td>\n",
       "      <td>YRI</td>\n",
       "      <td>ILLUMINA</td>\n",
       "      <td>PAIRED</td>\n",
       "      <td>data/NA18499/sequence_read/SRR797225_1.filt.fa...</td>\n",
       "      <td>8623980</td>\n",
       "      <td>871021980</td>\n",
       "      <td>low coverage</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>25251 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         STUDY_ID                                         STUDY_NAME  \\\n",
       "1       SRP000032                        1000Genomes Project Pilot 2   \n",
       "2       SRP000032                        1000Genomes Project Pilot 2   \n",
       "3       SRP000032                        1000Genomes Project Pilot 2   \n",
       "4       SRP000032                        1000Genomes Project Pilot 2   \n",
       "5       SRP000032                        1000Genomes Project Pilot 2   \n",
       "6       SRP000032                        1000Genomes Project Pilot 2   \n",
       "7       SRP000032                        1000Genomes Project Pilot 2   \n",
       "8       SRP000032                        1000Genomes Project Pilot 2   \n",
       "9       SRP000032                        1000Genomes Project Pilot 2   \n",
       "10      SRP000032                        1000Genomes Project Pilot 2   \n",
       "11      SRP000032                        1000Genomes Project Pilot 2   \n",
       "12      SRP000032                        1000Genomes Project Pilot 2   \n",
       "13      SRP000032                        1000Genomes Project Pilot 2   \n",
       "14      SRP000032                        1000Genomes Project Pilot 2   \n",
       "15      SRP000032                        1000Genomes Project Pilot 2   \n",
       "16      SRP000032                        1000Genomes Project Pilot 2   \n",
       "17      SRP000032                        1000Genomes Project Pilot 2   \n",
       "18      SRP000032                        1000Genomes Project Pilot 2   \n",
       "19      SRP000032                        1000Genomes Project Pilot 2   \n",
       "20      SRP000032                        1000Genomes Project Pilot 2   \n",
       "21      SRP000032                        1000Genomes Project Pilot 2   \n",
       "22      SRP000032                        1000Genomes Project Pilot 2   \n",
       "23      SRP000032                        1000Genomes Project Pilot 2   \n",
       "24      SRP000032                        1000Genomes Project Pilot 2   \n",
       "25      SRP000032                        1000Genomes Project Pilot 2   \n",
       "26      SRP000032                        1000Genomes Project Pilot 2   \n",
       "27      SRP000032                        1000Genomes Project Pilot 2   \n",
       "28      SRP000032                        1000Genomes Project Pilot 2   \n",
       "29      SRP000032                        1000Genomes Project Pilot 2   \n",
       "30      SRP000032                        1000Genomes Project Pilot 2   \n",
       "...           ...                                                ...   \n",
       "178036  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178039  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178054  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178063  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178081  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178084  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178096  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178099  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178117  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178135  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178144  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178171  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178183  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178189  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178225  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178282  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178312  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178315  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178324  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178327  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178480  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "178750  SRP004078  1000 Genomes CEPH (Utah residents with ancestr...   \n",
       "NA.298         NA                                                 NA   \n",
       "NA.299         NA                                                 NA   \n",
       "178759  SRP004078  1000 Genomes CEPH (Utah residents with ancestr...   \n",
       "NA.300         NA                                                 NA   \n",
       "NA.301         NA                                                 NA   \n",
       "184746  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "184747  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "184748  SRP000542      1000 Genomes YRI Yoruba population sequencing   \n",
       "\n",
       "       CENTER_NAME  SAMPLE_ID SAMPLE_NAME POPULATION INSTRUMENT_PLATFORM  \\\n",
       "1              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "2              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "3              BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "4              BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "5              BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "6              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "7              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "8              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "9              BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "10             BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "11             BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "12             BGI  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "13             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "14             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "15             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "16             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "17             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "18             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "19             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "20             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "21             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "22             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "23             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "24             BGI  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "25             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "26             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "27             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "28             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "29             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "30             BGI  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "...            ...        ...         ...        ...                 ...   \n",
       "178036       WUGSC  SRS000205     NA19201        YRI            ILLUMINA   \n",
       "178039       WUGSC  SRS000208     NA19207        YRI            ILLUMINA   \n",
       "178054       WUGSC  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "178063       WUGSC  SRS000209     NA19209        YRI            ILLUMINA   \n",
       "178081       WUGSC  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "178084       WUGSC  SRS000213     NA19239        YRI            ILLUMINA   \n",
       "178096       WUGSC  SRS000195     NA19144        YRI            ILLUMINA   \n",
       "178099       WUGSC  SRS000195     NA19144        YRI            ILLUMINA   \n",
       "178117       WUGSC  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "178135       WUGSC  SRS000214     NA19240        YRI            ILLUMINA   \n",
       "178144       WUGSC  SRS000207     NA19206        YRI            ILLUMINA   \n",
       "178171       WUGSC  SRS000199     NA19159        YRI            ILLUMINA   \n",
       "178183       WUGSC  SRS000182     NA19098        YRI            ILLUMINA   \n",
       "178189       WUGSC  SRS000182     NA19098        YRI            ILLUMINA   \n",
       "178225       WUGSC  SRS000198     NA19153        YRI            ILLUMINA   \n",
       "178282       WUGSC  SRS000212     NA19238        YRI            ILLUMINA   \n",
       "178312       WUGSC  SRS000205     NA19201        YRI            ILLUMINA   \n",
       "178315       WUGSC  SRS000208     NA19207        YRI            ILLUMINA   \n",
       "178324       WUGSC  SRS000209     NA19209        YRI            ILLUMINA   \n",
       "178327       WUGSC  SRS000207     NA19206        YRI            ILLUMINA   \n",
       "178480       WUGSC  SRS000199     NA19159        YRI            ILLUMINA   \n",
       "178750       WUGSC  SRS000075     NA12750        CEU            ILLUMINA   \n",
       "NA.298          NA         NA          NA        ACB                  NA   \n",
       "NA.299          NA         NA          NA        ACB                  NA   \n",
       "178759       WUGSC  SRS000075     NA12750        CEU            ILLUMINA   \n",
       "NA.300          NA         NA          NA        ACB                  NA   \n",
       "NA.301          NA         NA          NA        ACB                  NA   \n",
       "184746          BI  SRS000096     NA18499        YRI            ILLUMINA   \n",
       "184747          BI  SRS000096     NA18499        YRI            ILLUMINA   \n",
       "184748          BI  SRS000096     NA18499        YRI            ILLUMINA   \n",
       "\n",
       "       LIBRARY_LAYOUT                                       PAIRED_FASTQ  \\\n",
       "1              SINGLE                                                      \n",
       "2              SINGLE                                                      \n",
       "3              PAIRED                                                      \n",
       "4              PAIRED  data/NA19240/sequence_read/ERR000020_2.filt.fa...   \n",
       "5              PAIRED  data/NA19240/sequence_read/ERR000020_1.filt.fa...   \n",
       "6              SINGLE                                                      \n",
       "7              SINGLE                                                      \n",
       "8              SINGLE                                                      \n",
       "9              SINGLE                                                      \n",
       "10             PAIRED                                                      \n",
       "11             PAIRED  data/NA19240/sequence_read/ERR000025_2.filt.fa...   \n",
       "12             PAIRED  data/NA19240/sequence_read/ERR000025_1.filt.fa...   \n",
       "13             PAIRED                                                      \n",
       "14             PAIRED  data/NA19239/sequence_read/ERR000027_2.filt.fa...   \n",
       "15             PAIRED  data/NA19239/sequence_read/ERR000027_1.filt.fa...   \n",
       "16             SINGLE                                                      \n",
       "17             PAIRED                                                      \n",
       "18             PAIRED  data/NA19238/sequence_read/ERR000030_2.filt.fa...   \n",
       "19             PAIRED  data/NA19238/sequence_read/ERR000030_1.filt.fa...   \n",
       "20             SINGLE                                                      \n",
       "21             SINGLE                                                      \n",
       "22             PAIRED                                                      \n",
       "23             PAIRED  data/NA19239/sequence_read/ERR000034_2.filt.fa...   \n",
       "24             PAIRED  data/NA19239/sequence_read/ERR000034_1.filt.fa...   \n",
       "25             SINGLE                                                      \n",
       "26             SINGLE                                                      \n",
       "27             SINGLE                                                      \n",
       "28             SINGLE                                                      \n",
       "29             SINGLE                                                      \n",
       "30             SINGLE                                                      \n",
       "...               ...                                                ...   \n",
       "178036         PAIRED                                                      \n",
       "178039         PAIRED                                                      \n",
       "178054         PAIRED                                                      \n",
       "178063         PAIRED                                                      \n",
       "178081         PAIRED                                                      \n",
       "178084         PAIRED                                                      \n",
       "178096         PAIRED                                                      \n",
       "178099         PAIRED                                                      \n",
       "178117         PAIRED                                                      \n",
       "178135         PAIRED                                                      \n",
       "178144         PAIRED                                                      \n",
       "178171         PAIRED                                                      \n",
       "178183         PAIRED                                                      \n",
       "178189         PAIRED                                                      \n",
       "178225         PAIRED                                                      \n",
       "178282         PAIRED                                                      \n",
       "178312         PAIRED                                                      \n",
       "178315         PAIRED                                                      \n",
       "178324         PAIRED                                                      \n",
       "178327         PAIRED                                                      \n",
       "178480         PAIRED                                                      \n",
       "178750         PAIRED                                                      \n",
       "NA.298             NA                                                 NA   \n",
       "NA.299             NA                                                 NA   \n",
       "178759         PAIRED                                                      \n",
       "NA.300             NA                                                 NA   \n",
       "NA.301             NA                                                 NA   \n",
       "184746         PAIRED                                                      \n",
       "184747         PAIRED  data/NA18499/sequence_read/SRR797225_2.filt.fa...   \n",
       "184748         PAIRED  data/NA18499/sequence_read/SRR797225_1.filt.fa...   \n",
       "\n",
       "        READ_COUNT  BASE_COUNT ANALYSIS_GROUP  \n",
       "1          9280498   334097928  high coverage  \n",
       "2          9571982   344591352  high coverage  \n",
       "3           149044     5365584  high coverage  \n",
       "4          2057690    74076840  high coverage  \n",
       "5          2057690    74076840  high coverage  \n",
       "6          9388168   337974048  high coverage  \n",
       "7          7762958   279466488  high coverage  \n",
       "8          9625450   385018000  high coverage  \n",
       "9          8808642   317111112  high coverage  \n",
       "10           15187      683415  high coverage  \n",
       "11         2159324    97169580  high coverage  \n",
       "12         2159324    97169580  high coverage  \n",
       "13           59312     2669040  high coverage  \n",
       "14         5080128   228605760  high coverage  \n",
       "15         5080128   228605760  high coverage  \n",
       "16        11752662   423095832  high coverage  \n",
       "17          229179    10313055  high coverage  \n",
       "18         7692812   346176540  high coverage  \n",
       "19         7692812   346176540  high coverage  \n",
       "20        11402532   410491152  high coverage  \n",
       "21         6777368   243985248  high coverage  \n",
       "22           78918     2841048  high coverage  \n",
       "23         1131253    40725108  high coverage  \n",
       "24         1131253    40725108  high coverage  \n",
       "25        12013717   432493812  high coverage  \n",
       "26         8045886   289651896  high coverage  \n",
       "27         9081298   326926728  high coverage  \n",
       "28        10130502   364698072  high coverage  \n",
       "29         8632879   310783644  high coverage  \n",
       "30         8108919   291921084  high coverage  \n",
       "...            ...         ...            ...  \n",
       "178036       61270     6127000   low coverage  \n",
       "178039       61280     6128000   low coverage  \n",
       "178054       55356     5535600   low coverage  \n",
       "178063       67550     6755000   low coverage  \n",
       "178081       51922     5192200   low coverage  \n",
       "178084       56870     5687000   low coverage  \n",
       "178096      873195    87319500   low coverage  \n",
       "178099      821999    82199900   low coverage  \n",
       "178117       50397     5039700   low coverage  \n",
       "178135       55358     5535800   low coverage  \n",
       "178144       64122     6412200   low coverage  \n",
       "178171       72884     7288400   low coverage  \n",
       "178183      980198    98019800   low coverage  \n",
       "178189     1062464   106246400   low coverage  \n",
       "178225      785777    78577700   low coverage  \n",
       "178282       59886     5988600   low coverage  \n",
       "178312       63977     6397700   low coverage  \n",
       "178315       63850     6385000   low coverage  \n",
       "178324       63639     6363900   low coverage  \n",
       "178327       67132     6713200   low coverage  \n",
       "178480       68438     6843800   low coverage  \n",
       "178750       67944     6794400          exome  \n",
       "NA.298 -2147483648 -2147483648             NA  \n",
       "NA.299 -2147483648 -2147483648             NA  \n",
       "178759       44974     4497400          exome  \n",
       "NA.300 -2147483648 -2147483648             NA  \n",
       "NA.301 -2147483648 -2147483648             NA  \n",
       "184746      871763    88048063   low coverage  \n",
       "184747     8623980   871021980   low coverage  \n",
       "184748     8623980   871021980   low coverage  \n",
       "\n",
       "[25251 rows x 12 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd_yri_ceu = pandas2ri.ri2py(yri_ceu)\n",
    "print(type(pd_yri_ceu))\n",
    "pd_yri_ceu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " [1]\n",
      " \"STUDY_ID\"           \n",
      " \"STUDY_NAME\"         \n",
      " \"CENTER_NAME\"        \n",
      "\n",
      "\n",
      " [4]\n",
      " \"SAMPLE_ID\"          \n",
      " \"SAMPLE_NAME\"        \n",
      " \"POPULATION\"         \n",
      "\n",
      "\n",
      " [7]\n",
      " \"INSTRUMENT_PLATFORM\"\n",
      " \"LIBRARY_LAYOUT\"     \n",
      " \"READ_COUNT\"         \n",
      "\n",
      "\n",
      "[10]\n",
      " \"BASE_COUNT\"         \n",
      " \"ANALYSIS_GROUP\"     \n",
      "\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"STUDY_ID\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"STUDY_NAME\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"CENTER_NAME\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"SAMPLE_ID\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"SAMPLE_NAME\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"POPULATION\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"INSTRUMENT_PLATFORM\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"LIBRARY_LAYOUT\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"READ_COUNT\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"BASE_COUNT\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n",
      "/home/tiago_antao/anaconda3/lib/python3.6/site-packages/rpy2/robjects/pandas2ri.py:62: UserWarning: Error while trying to convert the column \"ANALYSIS_GROUP\". Fall back to string conversion. The error is: Conversion 'py2ri' not defined for objects of type '<class 'pandas.core.series.Series'>'\n",
      "  (name, str(e)))\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <span>StrVector with 11 elements.</span>\n",
       "    <table>\n",
       "      <tbody>\n",
       "      <tr>\n",
       "      \n",
       "      <td>\n",
       "        'STUDY_ID'\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'STUDY_NA...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'CENTER_N...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'SAMPLE_ID'\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        ...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'LIBRARY_...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'READ_COU...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'BASE_COU...\n",
       "      </td>\n",
       "      \n",
       "      <td>\n",
       "        'ANALYSIS...\n",
       "      </td>\n",
       "      \n",
       "      </tr>\n",
       "      </tbody>\n",
       "    </table>\n",
       "    "
      ],
      "text/plain": [
       "R object with classes: ('character',) mapped to:\n",
       "<StrVector - Python:0x7fb6b6108488 / R:0x55ceb0b6c850>\n",
       "['STUDY_ID', 'STUDY_NA..., 'CENTER_N..., 'SAMPLE_ID', ..., 'LIBRARY_..., 'READ_COU..., 'BASE_COU..., 'ANALYSIS...]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del pd_yri_ceu['PAIRED_FASTQ']\n",
    "no_paired = pandas2ri.py2ri(pd_yri_ceu)\n",
    "robjects.r.assign('no.paired', no_paired)\n",
    "robjects.r(\"print(colnames(no.paired))\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
